DNA methylation-based diagnosis and prognosis of pediatric AML#
We propose to leverage machine learning tools to develop DNA methylation-based signatures of clinical utility in pediatric AML.
The AML Methylome#
Interactive visualization of the diagnostic map of AML for pediatric/adolecent/young adult patients based solely on DNA methylation.
Show code cell outputs
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18616\2526871669.py in <module>
5 output_path = '../Data/Processed_Data/'
6
----> 7 x_train = pd.read_pickle(PaCMAP_path+'embedding.pkl')
8 x_test = pd.read_pickle(PaCMAP_path+'embedding_test.pkl')
9
c:\users\flourenco\appdata\local\programs\python\python37\lib\site-packages\pandas\io\pickle.py in read_pickle(filepath_or_buffer, compression, storage_options)
215 # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]";
216 # expected "IO[bytes]"
--> 217 return pickle.load(handles.handle) # type: ignore[arg-type]
218 except excs_to_catch:
219 # e.g.
ValueError: unsupported pickle protocol: 5
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource, Legend
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
# Define a function for creating the Bokeh figure
def create_figure():
return figure(title='The Pediatric AML Methylome Atlas',
width=1000, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Diagnosis", "@{WHO Classification}"),
("Karyotype", "@Karyotype")])
# Define a function for creating the scatter plots
def create_scatters(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
renderers = []
items = []
for i in range(len(filtered_dfs)):
name = filtered_dfs[i][hue].head(1).values[0]
color = custom_color_palette[i]
source = ColumnDataSource(filtered_dfs[i])
r = p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
fill_alpha=0.8, size=5,
color=color)
renderers.append(r)
items.append((name, [r]))
return renderers, items
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location='left')
points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols[:-1])]
for p, (renderers, items) in zip(tabs.tabs, points):
p.child.toolbar.logo = None
p.child.toolbar_location = 'above'
legend = Legend(items=items, location='top_left',)
p.child.add_layout(legend, 'right')
p.child.legend.click_policy = 'hide'
# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0][0][0].glyph.size)
for i in range(len(points)):
for r in points[i][0]:
slider.js_link("value", r.glyph, "size")
layout = layout([[[tabs, slider]]])
show(layout)
from bokeh.layouts import layout, gridplot
from bokeh.models import ColumnDataSource
# Define a function for creating the Bokeh figure
def create_figure():
return figure(width=600, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Diagnosis", "@{WHO Classification}"),
("Karyotype", "@Karyotype")])
def create_scatters(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
renderers = []
for i in range(len(filtered_dfs)):
name = filtered_dfs[i][hue].head(1).values[0]
color = custom_color_palette[i]
source = ColumnDataSource(filtered_dfs[i])
r = p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
fill_alpha=0.8, size=5,
color=color, legend_label=name)
renderers.append(r)
return renderers
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location='left')
points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
p.child.legend.click_policy = 'hide'
tabs2 = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location=)
points2 = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs2.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
p.child.legend.click_policy = 'hide'
# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0][0].glyph.size)
for i in range(len(points)):
for r in points[i]:
slider.js_link("value", r.glyph, "size")
grid = gridplot([[tabs, tabs2]])
layout = layout([[[grid, slider]]])
show(layout)
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource
# Define a function for creating the Bokeh figure
def create_figure():
return figure(width=600, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Diagnosis", "@{WHO Classification}"),
("Karyotype", "@Karyotype")])
def create_scatters(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
renderers = []
for i in range(len(filtered_dfs)):
name = filtered_dfs[i][hue].head(1).values[0]
color = custom_color_palette[i]
source = ColumnDataSource(filtered_dfs[i])
r = p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
fill_alpha=0.8, size=5,
color=color, legend_label=name)
renderers.append(r)
return renderers
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location='left')
points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
p.child.legend.click_policy = 'hide'
# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0][0].glyph.size)
for i in range(len(points)):
for r in points[i]:
slider.js_link("value", r.glyph, "size")
layout = layout([[[tabs, slider]]])
show(layout)
points[0][0].glyph
Scatter(
id = 'p18018', …)
from bokeh.layouts import layout
from bokeh.models import ColumnDataSource
# Define a function for creating the Bokeh figure
def create_figure():
return figure(width=600, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Diagnosis", "@{WHO Classification}"),
("Karyotype", "@Karyotype")])
def create_scatters(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
filtered_dfs = [df[df[hue] == val] for val in df[hue].value_counts().sort_values(ascending=False).index.to_list()]
for i in range(len(filtered_dfs)):
name = filtered_dfs[i][hue].head(1).values[0]
color = custom_color_palette[i]
source = ColumnDataSource(filtered_dfs[i])
p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=source,
fill_alpha=0.8, size=5,
color=color, legend_label=name)
return p
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location='left')
points = [create_scatters(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
p.child.legend.click_policy = 'hide'
# Define a slider for adjusting the size of the data points
# slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
# for p in points:
# slider.js_link("value", p.glyph, "size")
layout = layout([[[tabs]]])
show(layout)
points[0].glyph
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[12], line 1
----> 1 points[0].glyph
File c:\Users\flourenco\Desktop\projects\Methylation_Project\Code\.venv_data_vizualization\lib\site-packages\bokeh\core\has_props.py:360, in HasProps.__getattr__(self, name)
357 if isinstance(descriptor, property): # Python property
358 return super().__getattribute__(name)
--> 360 self._raise_attribute_error_with_matches(name, properties)
File c:\Users\flourenco\Desktop\projects\Methylation_Project\Code\.venv_data_vizualization\lib\site-packages\bokeh\core\has_props.py:368, in HasProps._raise_attribute_error_with_matches(self, name, properties)
365 if not matches:
366 matches, text = sorted(properties), "possible"
--> 368 raise AttributeError(f"unexpected attribute {name!r} to {self.__class__.__name__}, {text} attributes are {nice_join(matches)}")
AttributeError: unexpected attribute 'glyph' to figure, possible attributes are above, align, aspect_ratio, aspect_scale, background_fill_alpha, background_fill_color, below, border_fill_alpha, border_fill_color, center, classes, context_menu, css_classes, disabled, extra_x_ranges, extra_x_scales, extra_y_ranges, extra_y_scales, flow_mode, frame_align, frame_height, frame_width, height, height_policy, hidpi, hold_render, inner_height, inner_width, js_event_callbacks, js_property_callbacks, left, lod_factor, lod_interval, lod_threshold, lod_timeout, margin, match_aspect, max_height, max_width, min_border, min_border_bottom, min_border_left, min_border_right, min_border_top, min_height, min_width, name, outer_height, outer_width, outline_line_alpha, outline_line_cap, outline_line_color, outline_line_dash, outline_line_dash_offset, outline_line_join, outline_line_width, output_backend, renderers, reset_policy, resizable, right, sizing_mode, styles, stylesheets, subscribed_events, syncable, tags, title, title_location, toolbar, toolbar_inner, toolbar_location, toolbar_sticky, visible, width, width_policy, x_range, x_scale, y_range or y_scale
from bokeh.layouts import layout
# Define a function for creating the Bokeh figure
def create_figure():
return figure(width=600, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Diagnosis", "@{WHO Classification}"),
("Karyotype", "@Karyotype")])
# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
fill_alpha=0.8, size=5,
color=factor_cmap(field_name=hue, palette=custom_color_palette,
factors=df[hue].value_counts().sort_values(
ascending=False).index.to_list()),
legend_group=hue)
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location='left')
points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
slider.js_link("value", p.glyph, "size")
layout = layout([[[tabs, slider]]])
show(layout)
import pandas as pd
from bokeh.palettes import Spectral4
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT
p = figure(width=800, height=250, x_axis_type="datetime")
p.title.text = 'Click on legend entries to hide the corresponding lines'
for data, name, color in zip([AAPL, IBM, MSFT, GOOG], ["AAPL", "IBM", "MSFT", "GOOG"], Spectral4):
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])
p.line(df['date'], df['close'], line_width=2, color=color, alpha=0.8, legend_label=name)
p.legend.location = "top_left"
p.legend.click_policy="hide"
show(p)
from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.layouts import gridplot
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool,
CategoricalColorMapper)
source = ColumnDataSource(data=df)
# Define color mapper based on a column in the data source
color_mapper = CategoricalColorMapper(factors=df['WHO Classification'].value_counts().sort_values(
ascending=False).index.to_list(),
palette=custom_color_palette) # list of unique categories, list of colors to assign to categories
xdr = DataRange1d()
ydr = DataRange1d()
plot = Plot(
x_range=xdr, y_range=ydr,
width=600, height=600,
min_border=0,
toolbar_location='right',
)
# Use color_mapper to set fill color for circles
circle = Circle(x="PaCMAP 1", y="PaCMAP 2", size=6,
line_color="black", fill_alpha=0.6,
fill_color={'field': 'category_column', 'transform': color_mapper})
circle = plot.add_glyph(source, circle)
pan = PanTool()
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()
plot.add_tools(pan, wheel_zoom, preview_save)
# Add axes
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right')
# Create a separate legend plot using color_mapper
legend = Legend(
items=[("PaCMAP Output", [circle])],
location="center", orientation="vertical",
border_line_color="black",
title='Example Title'
)
legend_plot = Plot(
width=200, height=600,
toolbar_location=None,
)
legend_plot.add_layout(legend, 'center')
# Combine the two plots into a gridplot
grid = gridplot([[plot, legend_plot]])
show(grid)
from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.layouts import gridplot
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool)
source = ColumnDataSource(data=df)
xdr = DataRange1d()
ydr = DataRange1d()
plot = Plot(
x_range=xdr, y_range=ydr,
width=600, height=600,
min_border=0,
toolbar_location='right',
)
circle = Circle(x="PaCMAP 1", y="PaCMAP 2", size=6, line_color="red", fill_color="orange", fill_alpha=0.6)
circle = plot.add_glyph(source, circle)
pan = PanTool()
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()
plot.add_tools(pan, wheel_zoom, preview_save)
# Add axes
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right')
# Create a separate legend plot
legend = Legend(
items=[("PaCMAP Output", [circle])],
location="center", orientation="vertical",
border_line_color="black",
title='Example Title'
)
legend_plot = Plot(
width=600, height=600,
toolbar_location=None,
)
legend_plot.add_layout(legend, 'left')
# Combine the two plots into a gridplot
grid = gridplot([[plot, legend_plot]])
show(grid)
WARNING:bokeh.core.validation.check:W-1000 (MISSING_RENDERERS): Plot has no renderers: Plot(id='p3643', ...)
source()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[33], line 1
----> 1 source()
TypeError: 'ColumnDataSource' object is not callable
from numpy import cos, linspace, pi, sin
from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.layouts import gridplot
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool)
source = ColumnDataSource(data=df.copy())
xdr = DataRange1d()
ydr = DataRange1d()
plot = Plot(
x_range=xdr, y_range=ydr,
width=600, height=600,
min_border=0,
toolbar_location='right',
)
circle = Circle(x="PaCMAP1", y="PaCMAP2", size=6, line_color="red", fill_color="orange", fill_alpha=0.6)
circle = plot.add_glyph(source, circle)
pan = PanTool()
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()
plot.add_tools(pan, wheel_zoom, preview_save)
# Add axes
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right')
# Create a separate legend plot
legend = Legend(
items=[("circleeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee", [circle])],
location="center", orientation="vertical",
border_line_color="black",
title='Example Title'
)
legend_plot = Plot(
width=600, height=600,
toolbar_location=None,
)
legend_plot.add_layout(legend, 'left')
# Combine the two plots into a gridplot
grid = gridplot([[plot, legend_plot]])
show(grid)
WARNING:bokeh.core.validation.check:W-1000 (MISSING_RENDERERS): Plot has no renderers: Plot(id='p15743', ...)
from numpy import cos, linspace, pi, sin
from bokeh.core.enums import LegendLocation
from bokeh.io import show
from bokeh.models import (Circle, ColumnDataSource, DataRange1d, Legend,
LinearAxis, PanTool, Plot, SaveTool, WheelZoomTool)
x = linspace(-2*pi, 2*pi, 400)
y = sin(x)
source = ColumnDataSource(data=dict(x=x, y=y))
xdr = DataRange1d()
ydr = DataRange1d()
plot = Plot(
x_range=xdr, y_range=ydr,
width=600, height=600,
min_border=0,
toolbar_location='right',
)
circle = Circle(x="x", y="y", size=6, line_color="red", fill_color="orange", fill_alpha=0.6)
circle = plot.add_glyph(source, circle)
pan = PanTool()
wheel_zoom = WheelZoomTool()
preview_save = SaveTool()
plot.add_tools(pan, wheel_zoom, preview_save)
# Add axes (Note it's important to add these before adding legends in side panels)
plot.add_layout(LinearAxis(), 'below')
plot.add_layout(LinearAxis(), 'left')
plot.add_layout(LinearAxis(), 'right')
def add_legend(location, orientation, side):
legend = Legend(
items=[("circle", [circle])],
location=location, orientation=orientation,
border_line_color="black",
title='Example Title'
)
plot.add_layout(legend, side)
add_legend("center", "vertical", "below")
show(plot)
from bokeh.layouts import layout
# Define the list of columns to include in the plot
cols = ['PaCMAP Output','WHO Classification', 'FAB', 'FLT3 ITD', 'Age group (years)',
'Complex Karyotype','Primary Cytogenetic Code' ,'Karyotype']
# Join the training data with the labels and reset the index
df = x_train.join(y_train[cols]).reset_index()
# Set the theme for the plot
curdoc().theme = 'light_minimal'
# Define a function for creating the Bokeh figure
def create_figure():
return figure(width=600, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Diagnosis", "@{WHO Classification}"),
("Karyotype", "@Karyotype")])
# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
fill_alpha=0.8, size=5,
color=factor_cmap(field_name=hue, palette=custom_color_palette,
factors=df[hue].value_counts().sort_values(
ascending=False).index.to_list()),
legend_group=hue)
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location='left')
points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
slider.js_link("value", p.glyph, "size")
# Create a layout for the plot and display it
div = Div(text="<b>The AML Diagnostic Map</b>\nInteractive visualization of the pediatric AML methylome:",
width=200, height=85)
layout = layout([[[div, tabs, slider]]])
show(layout)
from bokeh.layouts import layout
# Define the list of columns to include in the plot
cols = ['PaCMAP Output','WHO Classification', 'FAB', 'FLT3 ITD', 'Age group (years)',
'Complex Karyotype','Primary Cytogenetic Code' ,'Karyotype']
# Join the training data with the labels and reset the index
df = x_train.join(y_train[cols]).reset_index()
# Set the theme for the plot
curdoc().theme = 'light_minimal'
# Define a function for creating the Bokeh figure
def create_figure():
return figure(width=600, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Diagnosis", "@{WHO Classification}"),
("Karyotype", "@Karyotype")])
# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
fill_alpha=0.8, size=5,
color=factor_cmap(field_name=hue, palette=custom_color_palette,
factors=df[hue].value_counts().sort_values(
ascending=False).index.to_list()),
legend_group=hue)
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title=title) for title in cols[:-1]],
tabs_location='left')
points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
slider.js_link("value", p.glyph, "size")
# Create a layout for the plot and display it
div = Div(text="<b>The AML Diagnostic Map</b>\nInteractive visualization of the pediatric AML methylome:",
width=200, height=85)
layout = layout([[[div, tabs, slider]]])
show(layout)
# Define the list of columns to include in the plot
cols = ['Primary Cytogenetic Code', 'FAB', 'FLT3 ITD', 'Age group (years)',
'WHO Classification', 'Complex Karyotype', 'Karyotype']
# Join the training data with the labels and reset the index
df = x_train.join(y_train[cols]).reset_index()
# Set the theme for the plot
curdoc().theme = 'light_minimal'
# Define a function for creating the Bokeh figure
def create_figure():
return figure(width=600, height=600, sizing_mode='fixed',
x_axis_label='PaCMAP 1', y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom,reset,save", active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Sample", "@index"), ("Karyotype", "@Karyotype")])
# Define a function for creating a scatter plot with color encoding by a given column
def create_scatter(df, p, hue):
df = df[~df[hue].isna()] # Filter out rows with NaN values for the hue column
return p.scatter(x="PaCMAP 1", y="PaCMAP 2", source=df.copy(),
fill_alpha=0.8, size=5,
color=factor_cmap(field_name=hue, palette=custom_color_palette,
factors=df[hue].value_counts().sort_values(
ascending=False).index.to_list()),
legend_group=hue)
# Create the Bokeh figure and scatter plots for each column
tabs = Tabs(tabs=[TabPanel(child=create_figure(), title='PaCMAP Output'),
TabPanel(child=create_figure(), title='FAB'),
TabPanel(child=create_figure(), title='Complex Karyotype'),
TabPanel(child=create_figure(), title='FLT3 ITD'),
TabPanel(child=create_figure(), title='Primary Cytogenetic Code'),
TabPanel(child=create_figure(), title='WHO Classification'),
TabPanel(child=create_figure(), title='Age group (years)')],
tabs_location='left')
points = [create_scatter(df, tab.child, hue=col) for tab, col in zip(tabs.tabs, cols)]
for p in tabs.tabs:
p.child.toolbar.logo = None
# Define a slider for adjusting the size of the data points
slider = Slider(title="Adjust datapoint size", start=0, end=20, step=1, value=points[0].glyph.size)
for p in points:
slider.js_link("value", p.glyph, "size")
# Create a layout for the plot and display it
div = Div(text="<b>The AML Diagnostic Map</b>\nInteractive visualization of the pediatric AML methylome:",
width=200, height=85)
layout = layout([[[div, tabs, slider]]])
show(layout)
from bokeh.layouts import layout
list = ['Primary Cytogenetic Code', 'FAB', 'FLT3 ITD','Age group (years)',
'WHO Classification','Complex Karyotype', 'Karyotype']
df = x_train.join(y_train[list]).reset_index() # join embedding with labels
df['PaCMAP Output'] = 'PaCMAP Output'
curdoc().theme = 'light_minimal'
def fig():
"""
Figure specs for Bokeh plot
"""
fig = figure(
width=600,
height=600,
sizing_mode='fixed',
x_axis_label='PaCMAP 1',
y_axis_label='PaCMAP 2',
tools="pan,wheel_zoom, reset, save",
active_drag="pan",
active_scroll="wheel_zoom",
tooltips=[("Sample", "@index"),
("Karyotype", "@Karyotype"),])
return(fig)
def scatter(df, p, hue):
"""
Scatter plot of embedding with color by hue
Parameters
----------
p : bokeh.plotting.figure.Figure
Bokeh figure object
hue : str
Column name of df to color by
Returns
-------
points : bokeh.models.renderers.GlyphRenderer
Bokeh glyph renderer object
"""
df = df[~df[hue].isna()] # df where df hue is not nan
points = p.scatter(x="PaCMAP 1",
y= "PaCMAP 2",
source=df.copy(),
fill_alpha=0.8,
size=5,
color=factor_cmap(field_name= hue,
palette=custom_color_palette,
factors= df[hue].value_counts(
).sort_values(ascending=False
).index.to_list()),
legend_group=hue)
return(points)
def create_tabs(df):
def create_tab(title, hue):
p = fig()
points = scatter(df, p, hue=hue)
tab = TabPanel(child=p, title=title)
p.toolbar.logo = None
return tab
tabs = Tabs(
tabs=[create_tab(title, hue) for title, hue in list],
tabs_location='left'
)
return tabs
slider = Slider(
title="Adjust datapoint size",
start=0,
end=20,
step=1,
value=(points1.glyph.size))
slider.js_link("value", points1.glyph, "size")
slider.js_link("value", points2.glyph, "size")
slider.js_link("value", points3.glyph, "size")
slider.js_link("value", points4.glyph, "size")
slider.js_link("value", points5.glyph, "size")
slider.js_link("value", points6.glyph, "size")
slider.js_link("value", points7.glyph, "size")
# create layout
layout = layout([[[tabs, slider]]])
# show result
show(layout)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[3], line 88
72 return tabs
75 div = Div(
76 text="""
77 <b> The AML Diagnostic Map</b>
(...)
80 width=200,
81 height=85)
83 slider = Slider(
84 title="Adjust datapoint size",
85 start=0,
86 end=20,
87 step=1,
---> 88 value=(points1.glyph.size))
90 slider.js_link("value", points1.glyph, "size")
91 slider.js_link("value", points2.glyph, "size")
NameError: name 'points1' is not defined
Table of Contents#
Specific Aims
Introduction
Data Preparation
AML Diagnostic Map
Survival Analyses
Immune Cell Deconvolution